Abstract
Background We have run the simplified Naomi model using a range of inference methods: TMB, aghq, adam and tmbstan.
Task In this report, we compare the accuracy of the posterior distributions obtained from these inference methods using histograms, Kolmogorov-Smirnov tests, maximum mean discrepancy and Pareto-smoothed importance sampling.
We compare the inference results from TMB, aghq, adam, and tmbstan.
Import these inference results as follows:
tmb <- readRDS("depends/tmb.rds")
aghq <- readRDS("depends/aghq.rds")
adam <- readRDS("depends/adam.rds")
tmbstan <- readRDS("depends/tmbstan.rds")
depends <- yaml::read_yaml("orderly.yml")$depends
For more information about the conditions under which these results were generated, see:
TMBdependency_details <- function(i) {
report_name <- names(depends[[i]])
print(paste0("Inference results obtained from ", report_name, " with the query ", depends[[i]][[report_name]]$id))
report_id <- orderly::orderly_search(query = depends[[i]][[report_name]]$id, report_name)
print(paste0("Obtained report had ID ", report_id, " and was run with the following parameters:"))
print(orderly::orderly_info(report_id, report_name)$parameters)
}
dependency_details(1)
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmb == TRUE)"
## [1] "Obtained report had ID 20230119-142551-23704e7b and was run with the following parameters:"
## $tmb
## [1] TRUE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $ndConstruction
## [1] "product"
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
aghqdependency_details(2)
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:aghq == TRUE && parameter:k == 1)"
## [1] "Obtained report had ID 20230119-170459-9b07786e and was run with the following parameters:"
## $aghq
## [1] TRUE
##
## $k
## [1] 1
##
## $ndConstruction
## [1] "product"
##
## $tmb
## [1] FALSE
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
adamdependency_details(3)
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:adam == TRUE)"
## [1] "Obtained report had ID 20230209-142804-11535a68 and was run with the following parameters:"
## $adam
## [1] TRUE
##
## $tmb
## [1] FALSE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $ndConstruction
## [1] "product"
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
##
## $area_level
## [1] 4
tmbstandependency_details(4)
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmbstan == TRUE)"
## [1] "Obtained report had ID 20230114-142916-efabd65d and was run with the following parameters:"
## $tmbstan
## [1] TRUE
##
## $niter
## [1] 20000
##
## $nthin
## [1] 20
##
## $tmb
## [1] FALSE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $nsample
## [1] 1000
data.frame(
"TMB" = tmb$time,
"aghq" = aghq$time,
"adam" = adam$time,
"tmbstan" = tmbstan$time
)
adam <- adam$adam
We create histograms and empirical cumulative distribution function (ECDF) plots of the samples from each method. All of the possible parameter names are as follows:
pars <- unique(names(tmb$fit$obj$env$par))
We will produce plots about the following subset of them. There is no particular reason to choose this subset rather than other, it’s quite arbitrary.
pars_eval <- pars %in% c("beta_rho", "beta_alpha", "beta_lambda", "beta_anc_rho", "beta_anc_alpha", "u_rho_x", "us_rho_x", "u_rho_xs")
names(pars_eval) <- pars
beta_rho <- histogram_and_ecdf("beta_rho", i = 1, return_df = TRUE)
saveRDS(beta_rho$df, "beta_rho.rds")
beta_rhohistogram_and_ecdf_helper <- function(par) lapply(1:sum(names(tmb$fit$obj$env$par) == par), histogram_and_ecdf, par = par)
histogram_and_ecdf_helper("beta_rho")
## [[1]]
##
## [[2]]
beta_alphahistogram_and_ecdf_helper("beta_alpha")
## [[1]]
##
## [[2]]
beta_lambdahistogram_and_ecdf_helper("beta_lambda")
## [[1]]
##
## [[2]]
beta_anc_rhohistogram_and_ecdf("beta_anc_rho")
beta_anc_alphahistogram_and_ecdf("beta_anc_alpha")
logitlapply(pars[stringr::str_starts(pars, "logit")], histogram_and_ecdf)
log_sigmalapply(pars[stringr::str_starts(pars, "log_sigma")], histogram_and_ecdf)
u_rho_xhistogram_and_ecdf_helper("u_rho_x")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
u_rho_xhistogram_and_ecdf_helper("u_rho_x")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
us_rho_xhistogram_and_ecdf_helper("us_rho_x")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
u_rho_xshistogram_and_ecdf_helper("u_rho_xs")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
us_rho_xshistogram_and_ecdf_helper("us_rho_xs")
u_rho_ashistogram_and_ecdf_helper("u_rho_as")
u_alpha_xhistogram_and_ecdf_helper("u_alpha_x")
us_alpha_xhistogram_and_ecdf_helper("us_alpha_x")
u_alpha_xshistogram_and_ecdf_helper("u_alpha_xs")
us_alpha_xshistogram_and_ecdf_helper("us_alpha_xs")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
u_alpha_ahistogram_and_ecdf_helper("u_alpha_a")
u_alpha_ashistogram_and_ecdf_helper("u_alpha_as")
u_alpha_xahistogram_and_ecdf_helper("u_alpha_xa")
ui_lambda_xhistogram_and_ecdf_helper("ui_lambda_x")
ui_anc_rho_xhistogram_and_ecdf_helper("ui_anc_rho_x")
ui_anc_alpha_xhistogram_and_ecdf_helper("ui_anc_alpha_x")
log_or_gammahistogram_and_ecdf_helper("log_or_gamma")
ks_helper <- function(par, ...) {
to_ks_df(par) %>% ks_plot(par, ...)
}
betaks_helper("beta")
ks_helper("beta", method1 = "TMB", method2 = "adam")
logitks_helper("logit")
ks_helper("logit", method1 = "TMB", method2 = "adam")
log_sigmaks_helper("log_sigma")
ks_helper("log_sigma", method1 = "TMB", method2 = "adam")
u_rho_xks_helper("u_rho_x")
ks_helper("u_rho_x", method1 = "TMB", method2 = "adam")
u_rho_xsks_helper("u_rho_xs")
ks_helper("u_rho_xs", method1 = "TMB", method2 = "adam")
us_rho_xks_helper("us_rho_x")
ks_helper("us_rho_x", method1 = "TMB", method2 = "adam")
us_rho_xsks_helper("us_rho_xs")
ks_helper("us_rho_xs", method1 = "TMB", method2 = "adam")
u_rho_aks_helper("u_rho_a")
ks_helper("u_rho_a", method1 = "TMB", method2 = "adam")
u_rho_asks_helper("u_rho_as")
ks_helper("u_rho_as", method1 = "TMB", method2 = "adam")
u_alpha_xks_helper("u_alpha_x")
ks_helper("u_alpha_x", method1 = "TMB", method2 = "adam")
u_alpha_xsks_helper("u_alpha_xs")
ks_helper("u_alpha_xs", method1 = "TMB", method2 = "adam")
us_alpha_xks_helper("us_alpha_x")
ks_helper("us_alpha_x", method1 = "TMB", method2 = "adam")
us_alpha_xsks_helper("us_alpha_xs")
ks_helper("us_alpha_xs", method1 = "TMB", method2 = "adam")
u_alpha_aks_helper("u_alpha_a")
ks_helper("u_alpha_a", method1 = "TMB", method2 = "adam")
u_alpha_asks_helper("u_alpha_as")
ks_helper("u_alpha_as", method1 = "TMB", method2 = "adam")
u_alpha_xaks_helper("u_alpha_xa")
ks_helper("u_alpha_xa", method1 = "TMB", method2 = "adam")
ui_anc_rho_xks_helper("ui_anc_rho_x")
ks_helper("ui_anc_rho_x", method1 = "TMB", method2 = "adam")
ui_anc_alpha_xks_helper("ui_anc_alpha_x")
ks_helper("ui_anc_alpha_x", method1 = "TMB", method2 = "adam")
log_or_gammaks_helper("log_or_gamma")
ks_helper("log_or_gamma", method1 = "TMB", method2 = "adam")
ks_summary <- lapply(unique(names(tmb$fit$obj$env$par)), function(x) {
to_ks_df(x) %>%
group_by(method) %>%
summarise(ks = mean(ks), par = x)
}) %>%
bind_rows() %>%
pivot_wider(names_from = "method", values_from = "ks") %>%
rename(
"Parameter" = "par",
"KS(adam, tmbstan)" = "adam",
"KS(aghq, tmbstan)" = "aghq",
"KS(TMB, tmbstan)" = "TMB",
)
ks_summary %>%
gt::gt() %>%
gt::fmt_number(
columns = starts_with("KS"),
decimals = 3
)
| Parameter | KS(adam, tmbstan) | KS(aghq, tmbstan) | KS(TMB, tmbstan) |
|---|---|---|---|
| beta_rho | 0.091 | 0.090 | 0.085 |
| beta_alpha | 0.123 | 0.101 | 0.103 |
| beta_lambda | 0.042 | 0.061 | 0.074 |
| beta_anc_rho | 0.069 | 0.105 | 0.090 |
| beta_anc_alpha | 0.070 | 0.044 | 0.025 |
| logit_phi_rho_x | 0.536 | 0.536 | 0.118 |
| log_sigma_rho_x | 0.646 | 0.646 | 0.195 |
| logit_phi_rho_xs | 0.510 | 0.510 | 0.111 |
| log_sigma_rho_xs | 0.613 | 0.613 | 0.203 |
| logit_phi_rho_a | 0.552 | 0.552 | 0.080 |
| log_sigma_rho_a | 0.585 | 0.585 | 0.113 |
| logit_phi_rho_as | 0.569 | 0.569 | 0.104 |
| log_sigma_rho_as | 0.589 | 0.589 | 0.134 |
| log_sigma_rho_xa | 0.669 | 0.669 | 0.224 |
| u_rho_x | 0.090 | 0.092 | 0.095 |
| us_rho_x | 0.065 | 0.062 | 0.062 |
| u_rho_xs | 0.124 | 0.117 | 0.121 |
| us_rho_xs | 0.045 | 0.037 | 0.044 |
| u_rho_a | 0.090 | 0.087 | 0.083 |
| u_rho_as | 0.092 | 0.084 | 0.076 |
| logit_phi_alpha_x | 0.531 | 0.531 | 0.107 |
| log_sigma_alpha_x | 0.557 | 0.557 | 0.143 |
| logit_phi_alpha_xs | 0.551 | 0.551 | 0.143 |
| log_sigma_alpha_xs | 0.540 | 0.540 | 0.159 |
| logit_phi_alpha_a | 0.525 | 0.525 | 0.083 |
| log_sigma_alpha_a | 0.541 | 0.541 | 0.111 |
| logit_phi_alpha_as | 0.516 | 0.516 | 0.081 |
| log_sigma_alpha_as | 0.514 | 0.514 | 0.098 |
| log_sigma_alpha_xa | 0.627 | 0.627 | 0.146 |
| u_alpha_x | 0.096 | 0.096 | 0.089 |
| us_alpha_x | 0.087 | 0.084 | 0.083 |
| u_alpha_xs | 0.110 | 0.101 | 0.096 |
| us_alpha_xs | 0.102 | 0.097 | 0.095 |
| u_alpha_a | 0.084 | 0.085 | 0.084 |
| u_alpha_as | 0.112 | 0.096 | 0.102 |
| u_alpha_xa | 0.068 | 0.069 | 0.059 |
| OmegaT_raw | 0.518 | 0.518 | 0.022 |
| log_betaT | 0.689 | 0.689 | 0.244 |
| log_sigma_lambda_x | 0.736 | 0.736 | 0.269 |
| ui_lambda_x | 0.163 | 0.158 | 0.160 |
| log_sigma_ancrho_x | 0.504 | 0.504 | 0.037 |
| log_sigma_ancalpha_x | 0.519 | 0.519 | 0.083 |
| ui_anc_rho_x | 0.044 | 0.055 | 0.057 |
| ui_anc_alpha_x | 0.063 | 0.063 | 0.065 |
| log_sigma_or_gamma | 0.524 | 0.524 | 0.036 |
| log_or_gamma | 0.044 | 0.059 | 0.060 |
r <- adam$quad$obj$env$random
x_names <- names(adam$quad$obj$env$par[r])
theta_names <- names(adam$quad$obj$env$par[-r])
dict <- data.frame(
Parameter = c(x_names, theta_names),
Type = c(rep("Latent field", length(x_names)), rep("Hyperparameters", length(theta_names)))
)
ks_summary <- ks_summary %>%
left_join(dict, by = "Parameter")
summary_ks_plot <- function(ks_summary, method1, method2) {
ks_method1 <- paste0("KS(", method1, ", tmbstan)")
ks_method2 <- paste0("KS(", method2, ", tmbstan)")
scatterplot <- ggplot(ks_summary, aes(x = .data[[ks_method1]], y = .data[[ks_method2]])) +
geom_point(alpha = 0.2) +
annotate(geom = "polygon", x = c(-Inf, Inf, Inf), y = c(-Inf, Inf, -Inf), fill = multi.utils::cbpalette()[3], alpha = 0.1) +
annotate(geom = "polygon", x = c(-Inf, Inf, -Inf), y = c(-Inf, Inf, Inf), fill = multi.utils::cbpalette()[5], alpha = 0.1) +
xlim(0, 1) +
ylim(0, 1) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
labs(x = ks_method1, y = ks_method2) +
theme_minimal()
ks_summary[["KS difference"]] <- ks_summary[[ks_method1]] - ks_summary[[ks_method2]]
jitterplot <- ggplot(ks_summary, aes(x = Type, y = `KS difference`)) +
geom_jitter(width = 0.1, alpha = 0.2) +
labs(x = "", y = paste0(ks_method1, " - ", ks_method2)) +
theme_minimal()
scatterplot + jitterplot
}
summary_ks_plot(ks_summary, "TMB", "aghq")
summary_ks_plot(ks_summary, "TMB", "adam")
summary_ks_plot(ks_summary, "aghq", "adam")
(ks_summary_out <- ks_summary %>%
group_by(Type) %>%
summarise(
TMB = mean(`KS(TMB, tmbstan)`),
aghq = mean(`KS(aghq, tmbstan)`),
adam = mean(`KS(adam, tmbstan)`)
))
Save some things for output:
pdf("ks_summary_tmb_adam.pdf", h = 4, w = 6.25)
summary_ks_plot(ks_summary, "TMB", "adam")
dev.off()
## quartz_off_screen
## 2
saveRDS(ks_summary_out, "ks_summary_out.rds")
The following parameters have KS values greater than 0.5 in both dimensions.
(big_ks <- ks_summary %>%
filter(`KS(aghq, tmbstan)` + `KS(TMB, tmbstan)` > 0.5) %>%
pull(Parameter))
## [1] "logit_phi_rho_x" "log_sigma_rho_x" "logit_phi_rho_xs" "log_sigma_rho_xs" "logit_phi_rho_a" "log_sigma_rho_a" "logit_phi_rho_as" "log_sigma_rho_as"
## [9] "log_sigma_rho_xa" "logit_phi_alpha_x" "log_sigma_alpha_x" "logit_phi_alpha_xs" "log_sigma_alpha_xs" "logit_phi_alpha_a" "log_sigma_alpha_a" "logit_phi_alpha_as"
## [17] "log_sigma_alpha_as" "log_sigma_alpha_xa" "OmegaT_raw" "log_betaT" "log_sigma_lambda_x" "log_sigma_ancrho_x" "log_sigma_ancalpha_x" "log_sigma_or_gamma"
#' To write!
Suppose we have two sets of samples from the posterior.
For each sample we are going to want to evaluate the log-likelihood, so that we can calculate the log-likelihood ratios.
We can extract the TMB objective function for the log-likelihood as follows:
tmb$fit$obj$fn()
## [1] 3327.999
## attr(,"logarithm")
## [1] TRUE
#' To write!